Opera 18 days REIMAGED - Preprocessing QC statistics ¶

Aug 2025¶

Reran by Sagy on Sep 16 (2025) - keeping only WT Untreated and FUS lines, removing CD41

In [2]:
import os
import sys

NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = os.path.join(NOVA_HOME, 'input')

os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")

LOGS_PATH = os.path.join(NOVA_HOME, "outputs/preprocessing/ManuscriptFinalData_80pct/neuronsDay18/logs/")
PLOT_PATH = os.path.join(NOVA_HOME, "outputs/preprocessing/ManuscriptFinalData_80pct/neuronsDay18/logs/plots")

os.chdir(NOVA_HOME)
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript

from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
                                                show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
                                                show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
                                                calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
                                                plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
                                                run_calc_hist_new
                                                
from tools.preprocessing_tools.qc_reports.qc_config import opera18days_panels, opera18days_markers, opera18days_marker_info, \
                                                opera18days_cell_lines, opera18days_cell_lines_to_cond,\
                                                opera18days_cell_lines_for_disp, opera18days_reps, \
                                                opera18days_line_colors, opera18days_lines_order, \
                                                opera18days_custom_palette, opera18days_expected_dapi_raw, \
                                                markers
%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA
In [3]:
df = log_files_qc(LOGS_PATH,only_wt_cond=False, filename_split='-',site_location=0)

df = df[df.cell_line.isin(['WT', 'FUSHomozygous', 'FUSHeterozygous','FUSRevertant'])]
df = df[df.condition == 'Untreated']

df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch2
reading logs of batch1

Total of 4 files were read.
Before dup handeling  (131158, 21)
After duplication removal #1: (120135, 22)
After duplication removal #2: (120135, 22)
In [4]:
# choose batches
batches = ['batch1', 'batch2']
batches
Out[4]:
['batch1', 'batch2']

Actual Files Validation¶

Raw Files Validation¶

  1. How many site tiff files do we have in each folder?
  2. Are all existing files valid? (tif or tiff, at least 1MB, not corrupetd)
In [5]:
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'images', 'raw', 'Opera18DaysReimaged_sorted')

batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, opera18days_panels, opera18days_markers.copy(),PLOT_PATH, opera18days_marker_info,
                                    opera18days_cell_lines_to_cond, opera18days_reps, opera18days_cell_lines_for_disp, opera18days_expected_dapi_raw,
                                     batches=batches_raw, fig_height=12)
batch1
Folder structure is valid.
No bad files are found.
Total Sites:  32000
df_reset (58, 5) colored_df (58, 5)
               Rep WT_Untreated FUSHomozygous_Untreated  \
Marker                                                    
G3BP1         rep1          100                     100   
G3BP1         rep2          100                     100   
NONO          rep1          100                     100   
NONO          rep2          100                     100   
SQSTM1        rep1          100                     100   
SQSTM1        rep2          100                     100   
PSD95         rep1          100                     100   
PSD95         rep2          100                     100   
NEMO          rep1          100                     100   
NEMO          rep2          100                     100   
GM130         rep1          100                     100   
GM130         rep2          100                     100   
NCL           rep1          100                     100   
NCL           rep2          100                     100   
ANXA11        rep1          100                     100   
ANXA11        rep2          100                     100   
Calreticulin  rep1          100                     100   
Calreticulin  rep2          100                     100   
mitotracker   rep1          100                     100   
mitotracker   rep2          100                     100   
KIF5A         rep1          100                     100   
KIF5A         rep2          100                     100   
TDP43         rep1          100                     100   
TDP43         rep2          100                     100   
FMRP          rep1          100                     100   
FMRP          rep2          100                     100   
CLTC          rep1          100                     100   
CLTC          rep2          100                     100   
DCP1A         rep1          100                     100   
DCP1A         rep2          100                     100   
TOMM20        rep1          100                     100   
TOMM20        rep2          100                     100   
FUS           rep1          100                     100   
FUS           rep2          100                     100   
SNCA          rep1          100                     100   
SNCA          rep2          100                     100   
LAMP1         rep1          100                     100   
LAMP1         rep2          100                     100   
PML           rep1          100                     100   
PML           rep2          100                     100   
PURA          rep1          100                     100   
PURA          rep2          100                     100   
Phalloidin    rep1          100                     100   
Phalloidin    rep2          100                     100   
PEX14         rep1          100                     100   
PEX14         rep2          100                     100   
Tubulin       rep1          100                     100   
Tubulin       rep2          100                     100   
PSPC1         rep1          100                     100   
PSPC1         rep2          100                     100   
VDAC1         rep1          100                     100   
VDAC1         rep2          100                     100   
AGO2          rep1          100                     100   
AGO2          rep2          100                     100   
HNRNPA1       rep1          100                     100   
HNRNPA1       rep2          100                     100   
DAPI          rep1         1200                    1200   
DAPI          rep2         1200                    1200   

             FUSHeterozygous_Untreated FUSRevertant_Untreated  
Marker                                                         
G3BP1                              100                    100  
G3BP1                              100                    100  
NONO                               100                    100  
NONO                               100                    100  
SQSTM1                             100                    100  
SQSTM1                             100                    100  
PSD95                              100                    100  
PSD95                              100                    100  
NEMO                               100                    100  
NEMO                               100                    100  
GM130                              100                    100  
GM130                              100                    100  
NCL                                100                    100  
NCL                                100                    100  
ANXA11                             100                    100  
ANXA11                             100                    100  
Calreticulin                       100                    100  
Calreticulin                       100                    100  
mitotracker                        100                    100  
mitotracker                        100                    100  
KIF5A                              100                    100  
KIF5A                              100                    100  
TDP43                              100                    100  
TDP43                              100                    100  
FMRP                               100                    100  
FMRP                               100                    100  
CLTC                               100                    100  
CLTC                               100                    100  
DCP1A                              100                    100  
DCP1A                              100                    100  
TOMM20                             100                    100  
TOMM20                             100                    100  
FUS                                100                    100  
FUS                                100                    100  
SNCA                               100                    100  
SNCA                               100                    100  
LAMP1                              100                    100  
LAMP1                              100                    100  
PML                                100                    100  
PML                                100                    100  
PURA                               100                    100  
PURA                               100                    100  
Phalloidin                         100                    100  
Phalloidin                         100                    100  
PEX14                              100                    100  
PEX14                              100                    100  
Tubulin                            100                    100  
Tubulin                            100                    100  
PSPC1                              100                    100  
PSPC1                              100                    100  
VDAC1                              100                    100  
VDAC1                              100                    100  
AGO2                               100                    100  
AGO2                               100                    100  
HNRNPA1                            100                    100  
HNRNPA1                            100                    100  
DAPI                              1200                   1200  
DAPI                              1200                   1200  
========
batch2
Folder structure is valid.
No bad files are found.
Total Sites:  32000
df_reset (58, 5) colored_df (58, 5)
               Rep WT_Untreated FUSHomozygous_Untreated  \
Marker                                                    
G3BP1         rep1          100                     100   
G3BP1         rep2          100                     100   
NONO          rep1          100                     100   
NONO          rep2          100                     100   
SQSTM1        rep1          100                     100   
SQSTM1        rep2          100                     100   
PSD95         rep1          100                     100   
PSD95         rep2          100                     100   
NEMO          rep1          100                     100   
NEMO          rep2          100                     100   
GM130         rep1          100                     100   
GM130         rep2          100                     100   
NCL           rep1          100                     100   
NCL           rep2          100                     100   
ANXA11        rep1          100                     100   
ANXA11        rep2          100                     100   
Calreticulin  rep1          100                     100   
Calreticulin  rep2          100                     100   
mitotracker   rep1          100                     100   
mitotracker   rep2          100                     100   
KIF5A         rep1          100                     100   
KIF5A         rep2          100                     100   
TDP43         rep1          100                     100   
TDP43         rep2          100                     100   
FMRP          rep1          100                     100   
FMRP          rep2          100                     100   
CLTC          rep1          100                     100   
CLTC          rep2          100                     100   
DCP1A         rep1          100                     100   
DCP1A         rep2          100                     100   
TOMM20        rep1          100                     100   
TOMM20        rep2          100                     100   
FUS           rep1          100                     100   
FUS           rep2          100                     100   
SNCA          rep1          100                     100   
SNCA          rep2          100                     100   
LAMP1         rep1          100                     100   
LAMP1         rep2          100                     100   
PML           rep1          100                     100   
PML           rep2          100                     100   
PURA          rep1          100                     100   
PURA          rep2          100                     100   
Phalloidin    rep1          100                     100   
Phalloidin    rep2          100                     100   
PEX14         rep1          100                     100   
PEX14         rep2          100                     100   
Tubulin       rep1          100                     100   
Tubulin       rep2          100                     100   
PSPC1         rep1          100                     100   
PSPC1         rep2          100                     100   
VDAC1         rep1          100                     100   
VDAC1         rep2          100                     100   
AGO2          rep1          100                     100   
AGO2          rep2          100                     100   
HNRNPA1       rep1          100                     100   
HNRNPA1       rep2          100                     100   
DAPI          rep1         1200                    1200   
DAPI          rep2         1200                    1200   

             FUSHeterozygous_Untreated FUSRevertant_Untreated  
Marker                                                         
G3BP1                              100                    100  
G3BP1                              100                    100  
NONO                               100                    100  
NONO                               100                    100  
SQSTM1                             100                    100  
SQSTM1                             100                    100  
PSD95                              100                    100  
PSD95                              100                    100  
NEMO                               100                    100  
NEMO                               100                    100  
GM130                              100                    100  
GM130                              100                    100  
NCL                                100                    100  
NCL                                100                    100  
ANXA11                             100                    100  
ANXA11                             100                    100  
Calreticulin                       100                    100  
Calreticulin                       100                    100  
mitotracker                        100                    100  
mitotracker                        100                    100  
KIF5A                              100                    100  
KIF5A                              100                    100  
TDP43                              100                    100  
TDP43                              100                    100  
FMRP                               100                    100  
FMRP                               100                    100  
CLTC                               100                    100  
CLTC                               100                    100  
DCP1A                              100                    100  
DCP1A                              100                    100  
TOMM20                             100                    100  
TOMM20                             100                    100  
FUS                                100                    100  
FUS                                100                    100  
SNCA                               100                    100  
SNCA                               100                    100  
LAMP1                              100                    100  
LAMP1                              100                    100  
PML                                100                    100  
PML                                100                    100  
PURA                               100                    100  
PURA                               100                    100  
Phalloidin                         100                    100  
Phalloidin                         100                    100  
PEX14                              100                    100  
PEX14                              100                    100  
Tubulin                            100                    100  
Tubulin                            100                    100  
PSPC1                              100                    100  
PSPC1                              100                    100  
VDAC1                              100                    100  
VDAC1                              100                    100  
AGO2                               100                    100  
AGO2                               100                    100  
HNRNPA1                            100                    100  
HNRNPA1                            100                    100  
DAPI                              1200                   1200  
DAPI                              1200                   1200  
========
====================
In [6]:
opera18days_cell_lines_to_cond
Out[6]:
{'WT': ['Untreated'],
 'FUSHomozygous': ['Untreated'],
 'FUSHeterozygous': ['Untreated'],
 'FUSRevertant': ['Untreated']}

Processed Files Validation¶

  1. How many site npy files do we have in each folder? -> How many sites survived the pre-processing?
  2. Are all existing files valid? (at least 100kB, npy not corrupted)
In [7]:
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'images', 'processed', 'ManuscriptFinalData_80pct', 'neuronsDay18')
procs = run_validate_folder_structure(root_directory_proc, True, opera18days_panels, opera18days_markers,PLOT_PATH,opera18days_marker_info,
                                    opera18days_cell_lines_to_cond, opera18days_reps, opera18days_cell_lines_for_disp, opera18days_expected_dapi_raw,
                                     batches=batches, fig_height=12)
batch1
Folder structure is valid.
No bad files are found.
Total Sites:  24518
df_reset (58, 5) colored_df (58, 5)
               Rep WT_Untreated FUSHomozygous_Untreated  \
Marker                                                    
G3BP1         rep1           86                      84   
G3BP1         rep2           97                      90   
NONO          rep1           74                      89   
NONO          rep2           81                      81   
SQSTM1        rep1           85                      85   
SQSTM1        rep2           62                      72   
PSD95         rep1           89                      57   
PSD95         rep2           91                      83   
NEMO          rep1           95                      96   
NEMO          rep2           91                      93   
GM130         rep1           23                      49   
GM130         rep2           96                      74   
NCL           rep1           95                      88   
NCL           rep2           90                      98   
ANXA11        rep1           97                      94   
ANXA11        rep2           98                      96   
Calreticulin  rep1           98                      99   
Calreticulin  rep2           94                      93   
mitotracker   rep1           80                      83   
mitotracker   rep2           81                      71   
KIF5A         rep1           23                      50   
KIF5A         rep2           95                      73   
TDP43         rep1           96                      99   
TDP43         rep2           96                      95   
FMRP          rep1           86                      86   
FMRP          rep2           65                      74   
CLTC          rep1           90                      57   
CLTC          rep2           91                      85   
DCP1A         rep1           96                      97   
DCP1A         rep2           97                      93   
TOMM20        rep1           86                      83   
TOMM20        rep2           97                      89   
FUS           rep1           95                      87   
FUS           rep2           90                      75   
SNCA          rep1           92                      91   
SNCA          rep2           97                      96   
LAMP1         rep1           97                      99   
LAMP1         rep2           94                      92   
PML           rep1           80                      84   
PML           rep2           75                      69   
PURA          rep1           86                      84   
PURA          rep2           97                      91   
Phalloidin    rep1           86                      86   
Phalloidin    rep2           65                      74   
PEX14         rep1           79                      84   
PEX14         rep2           83                      71   
Tubulin       rep1           64                      80   
Tubulin       rep2           97                      93   
PSPC1         rep1           65                      80   
PSPC1         rep2           97                      93   
VDAC1         rep1           74                      89   
VDAC1         rep2           81                      82   
AGO2          rep1           96                      99   
AGO2          rep2           96                      95   
HNRNPA1       rep1           65                      80   
HNRNPA1       rep2           97                      93   
DAPI          rep1          989                    1008   
DAPI          rep2         1084                    1044   

             FUSHeterozygous_Untreated FUSRevertant_Untreated  
Marker                                                         
G3BP1                               96                     21  
G3BP1                               88                     95  
NONO                                37                     46  
NONO                                62                     49  
SQSTM1                              83                     62  
SQSTM1                              43                     45  
PSD95                               67                     17  
PSD95                               86                     75  
NEMO                                99                     79  
NEMO                                90                     84  
GM130                               96                     94  
GM130                               99                     55  
NCL                                 60                     85  
NCL                                 95                     80  
ANXA11                              98                     77  
ANXA11                              97                     78  
Calreticulin                        90                     77  
Calreticulin                        97                     43  
mitotracker                         61                     62  
mitotracker                         30                     32  
KIF5A                               93                     94  
KIF5A                               98                     55  
TDP43                               82                     74  
TDP43                               92                     76  
FMRP                                84                     64  
FMRP                                46                     46  
CLTC                                71                     17  
CLTC                                89                     76  
DCP1A                              100                     80  
DCP1A                               96                     83  
TOMM20                              95                     20  
TOMM20                              82                     94  
FUS                                 60                     85  
FUS                                 92                     80  
SNCA                                96                     75  
SNCA                                92                     78  
LAMP1                               88                     77  
LAMP1                               97                     43  
PML                                 58                     55  
PML                                 29                     26  
PURA                                97                     21  
PURA                                90                     95  
Phalloidin                          83                     60  
Phalloidin                          43                     45  
PEX14                               62                     61  
PEX14                               31                     32  
Tubulin                              2                      8  
Tubulin                             85                     72  
PSPC1                                3                     11  
PSPC1                               85                     72  
VDAC1                               44                     54  
VDAC1                               63                     51  
AGO2                                81                     74  
AGO2                                92                     76  
HNRNPA1                              3                     11  
HNRNPA1                             85                     72  
DAPI                               889                    714  
DAPI                               978                    788  
========
batch2
Folder structure is invalid. Missing 3 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/SQSTM1
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/FMRP
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/Phalloidin
No bad files are found.
Total Sites:  22008
df_reset (58, 5) colored_df (58, 5)
               Rep WT_Untreated FUSHomozygous_Untreated  \
Marker                                                    
G3BP1         rep1           96                      91   
G3BP1         rep2           93                     100   
NONO          rep1           86                      15   
NONO          rep2           48                      71   
SQSTM1        rep1           92                      49   
SQSTM1        rep2           86                      21   
PSD95         rep1           67                      86   
PSD95         rep2           95                      89   
NEMO          rep1           99                      90   
NEMO          rep2           95                      93   
GM130         rep1           93                      98   
GM130         rep2           97                      94   
NCL           rep1           89                      94   
NCL           rep2           90                      89   
ANXA11        rep1           95                      96   
ANXA11        rep2           95                      99   
Calreticulin  rep1           95                      95   
Calreticulin  rep2           71                      80   
mitotracker   rep1           88                      68   
mitotracker   rep2           86                      48   
KIF5A         rep1           92                      97   
KIF5A         rep2           96                      93   
TDP43         rep1           95                      91   
TDP43         rep2           59                      92   
FMRP          rep1           98                      52   
FMRP          rep2           87                      22   
CLTC          rep1           88                      88   
CLTC          rep2           96                      91   
DCP1A         rep1           99                      94   
DCP1A         rep2           97                      96   
TOMM20        rep1           94                      31   
TOMM20        rep2           88                      78   
FUS           rep1           89                      94   
FUS           rep2           90                      90   
SNCA          rep1           92                      93   
SNCA          rep2           96                      91   
LAMP1         rep1           95                      95   
LAMP1         rep2           71                      79   
PML           rep1           91                      66   
PML           rep2           86                      43   
PURA          rep1           96                      92   
PURA          rep2           93                     100   
Phalloidin    rep1           99                      52   
Phalloidin    rep2           87                      22   
PEX14         rep1           79                      69   
PEX14         rep2           75                      49   
Tubulin       rep1          100                      82   
Tubulin       rep2           47                      31   
PSPC1         rep1           85                      79   
PSPC1         rep2           45                      31   
VDAC1         rep1           79                      30   
VDAC1         rep2           54                      70   
AGO2          rep1           95                      91   
AGO2          rep2           60                      92   
HNRNPA1       rep1          100                      82   
HNRNPA1       rep2           47                      31   
DAPI          rep1         1126                     979   
DAPI          rep2          979                     916   

             FUSHeterozygous_Untreated FUSRevertant_Untreated  
Marker                                                         
G3BP1                               86                     89  
G3BP1                               88                     73  
NONO                                52                     47  
NONO                                55                     39  
SQSTM1                              91                    NaN  
SQSTM1                              87                    NaN  
PSD95                               72                     62  
PSD95                               76                     72  
NEMO                                53                     42  
NEMO                                38                     49  
GM130                               65                     62  
GM130                               77                     76  
NCL                                 63                      5  
NCL                                 53                     52  
ANXA11                              74                     21  
ANXA11                              50                     21  
Calreticulin                        68                     42  
Calreticulin                        56                      3  
mitotracker                         53                     41  
mitotracker                         38                     23  
KIF5A                               65                     62  
KIF5A                               75                     76  
TDP43                               95                     91  
TDP43                               96                     94  
FMRP                                94                    NaN  
FMRP                                87                    NaN  
CLTC                                73                     63  
CLTC                                76                     71  
DCP1A                               53                     42  
DCP1A                               39                     49  
TOMM20                              80                     84  
TOMM20                              85                     71  
FUS                                 63                      5  
FUS                                 54                     52  
SNCA                                73                     21  
SNCA                                49                     21  
LAMP1                               67                     42  
LAMP1                               53                      3  
PML                                 52                     32  
PML                                 33                     23  
PURA                                86                     89  
PURA                                88                     73  
Phalloidin                          93                    NaN  
Phalloidin                          83                    NaN  
PEX14                               54                     41  
PEX14                               39                     25  
Tubulin                             77                     31  
Tubulin                             65                     62  
PSPC1                               72                     31  
PSPC1                               65                     60  
VDAC1                               54                     49  
VDAC1                               58                     40  
AGO2                                95                     91  
AGO2                                96                     94  
HNRNPA1                             76                     31  
HNRNPA1                             65                     62  
DAPI                               855                    536  
DAPI                               782                    571  
========
====================

Difference between Raw and Processed¶

In [8]:
display_diff(batches, raws, procs, PLOT_PATH,fig_height=12)
batch1
========
batch2
========

Variance in each batch (of processed files)¶

In [9]:
#for batch in list(range(3,9)) + ['7_16bit','8_16bit','9_16bit']:  

for batch in batches:
    with contextlib.redirect_stdout(io.StringIO()):
        var = sample_and_calc_variance(root_directory_proc, batch, 
                                       sample_size_per_markers=200, cond_count=2, rep_count=len(opera18days_reps), 
                                       num_markers=len(opera18days_markers))
    print(f'{batch} var: ',var)
batch1 var:  0.041773310225922025
batch2 var:  0.040858276841131386

Preprocessing Filtering qc¶

By order of filtering

1. % site survival after Brenner on DAPI channel¶

Percentage out of the total sites

In [10]:
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, opera18days_line_colors, opera18days_panels,
                                                         figsize=(10,6), reps = opera18days_reps)

2. % Site survival after Cellpose¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if Cellpose found 0 cells in it.

In [11]:
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, 
                                                           opera18days_line_colors, opera18days_panels, reps = opera18days_reps,
                                                           figsize=(10,6))

3. % Site survival by tiling¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.

In [12]:
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, 
                                                     opera18days_line_colors, opera18days_panels, figsize=(10,6),
                                                     reps = opera18days_reps)

4. % Site survival after Brenner on target channel¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).

In [13]:
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling,
                                 figsize=(10,10), markers=opera18days_markers)

Statistics About the Processed Files¶

In [14]:
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, opera18days_markers)

Total tiles¶

In [15]:
## Are we using FMRP?
markers_for_d18 = markers.copy()
markers_for_d18.remove('TIA1')
total_sum[total_sum.marker.isin(markers_for_d18)].n_valid_tiles.sum()
Out[15]:
148742

Total whole nuclei in tiles¶

In [16]:
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
Out[16]:
34576.0

Total nuclei in sites¶

In [17]:
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
Out[17]:
127146.0
In [18]:
show_total_sum_tables(total_sum)
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch1
count 320.000000 320.00000 320.000000 320.000000
mean 289.700000 2.89700 184.318750 671.237500
std 140.935046 1.40935 106.771803 312.327023
min 8.000000 0.08000 10.000000 19.000000
25% 190.000000 1.90000 115.000000 445.000000
50% 287.500000 2.87500 184.000000 642.000000
75% 402.000000 4.02000 229.000000 912.750000
max 582.000000 5.82000 1031.000000 2222.000000
sum 92704.000000 NaN 58982.000000 214796.000000
expected_count 450.000000 450.00000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch2
count 317.000000 317.000000 317.000000 317.000000
mean 256.053628 2.560536 151.261830 569.586751
std 146.087737 1.460877 88.699273 312.964252
min 0.000000 0.000000 0.000000 1.000000
25% 141.000000 1.410000 81.000000 324.000000
50% 234.000000 2.340000 154.000000 540.000000
75% 387.000000 3.870000 209.000000 856.000000
max 662.000000 6.620000 497.000000 1451.000000
sum 81169.000000 NaN 47950.000000 180559.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n valid tiles % valid tiles site_whole_cells_counts_sum site_cell_count
All batches
count 637.000000 637.000000 637.000000 637.000000
mean 272.956044 2.729560 167.868132 620.651491
std 144.394369 1.443944 99.502196 316.512225
min 0.000000 0.000000 0.000000 1.000000
25% 155.000000 1.550000 94.000000 373.000000
50% 276.000000 2.760000 163.000000 619.000000
75% 399.000000 3.990000 222.000000 870.000000
max 662.000000 6.620000 1031.000000 2222.000000
sum 173873.000000 NaN 106932.000000 395355.000000
expected_count 450.000000 450.000000 450.000000 450.000000

Show Total Tile Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

In [19]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count',
               title='Cellpose Cell Count Average per Site')

Show Cell Count Statistics per Batch¶

In [20]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count',
               title='Cellpose Cell Count Average per Site')

Show Tiles per Site Statistics¶

In [21]:
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
Out[21]:
cell_line_cond
FUSHeterozygous Untreated    2.924984
FUSHomozygous Untreated      3.255590
FUSRevertant Untreated       1.609745
WT Untreated                 3.902980
Name: n_valid_tiles, dtype: float64
In [22]:
df_dapi[['site_cell_count']].mean()
Out[22]:
site_cell_count    6.942937
dtype: float64
In [23]:
plot_catplot(df_dapi, opera18days_custom_palette, opera18days_reps, 
             x='n_valid_tiles', x_title='valid tiles count', batch_min=1, batch_max=2)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1063: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']

Show Mean of cell count in valid tiles¶

In [24]:
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line', columns='panel')

Assessing Staining Reproducibility and Outliers¶

In [26]:
# for batch in batches:
#     print(batch)
#     run_calc_hist_new(f'{batch}',opera18days_cell_lines_for_disp, opera18days_markers, 
#                            root_directory_raw, root_directory_proc, hist_sample=10,
#                             sample_size_per_markers=200, ncols=7, nrows=5)
#     print("="*30)
In [ ]:
# # save notebook as HTML ( the HTML will be saved in the same folder the original script is)
# from IPython.display import display, Javascript
# display(Javascript('IPython.notebook.save_checkpoint();'))
# os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_d18_Opera_80pct.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_d18_Opera_80pct.html')
In [ ]: